import re
from bs4 import  BeautifulSoup
import  requests
from lxml import etree
from multiprocessing import Pool
# try:
#     html2 = etree.parse('flower.html')
#     result = etree.tostring(html2, pretty_print=True)
#     print(result)
# except Exception as e:
#     print(e)
# pool =Pool(processes=2)
# #pool.map()
# #pool.close()
#
#
# text='''
#   <div>
#   <ul>
#     <li class="red"><h1>red flowers</h1></li>
#     <li class="white"><h2>white flowers</h2></li>
#     <li class="black"><h3>black flowers</h3></li>
#     <li class="blue"><h4>blue flowers</h4></li>
#     <li class="red"><h5>red flowers</h5></li>
#   </ul
#   </div>
# '''
# html=etree.HTML(text)
# result=etree.tostring(html)
# print(result)
header={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'}
res=requests.get('http://book.douban.com/top250',header)
htmls=etree.HTML(res.text)
result=etree.tostring(htmls)
print(result)


#Lxml 库 一个非常实用的功能就是自动修正HTML 代码

